# Import PAM data
um <- read_csv("data/20201014_UM_Acer/PAM/UM_pamdata.csv")
crf <- read_csv("data/20201013_CRF_Acer/PAM/CRF_pamdata.csv")
rrt <- read_csv("data/20201007_RRT_Acer/PAM/RRT_Acer_pamdata.csv")
mote <- read_csv("data/20201011_Mote_Acer/PAM/Mote_pamdata.csv")
fwc <- read_csv("data/20201010_FWC_Acer/PAM/FWC_pamdata.csv")

df <- list(um = um, crf = crf, rrt = rrt, mote = mote, fwc = fwc) %>%
  bind_rows(.id = "nursery")

df %>% distinct(nursery, geno) %>% nrow(.)
## [1] 193
# Initial filtering
dff <- df %>%
  # Omit rows for one coral that had missing pam data at 32°C
  filter(!(geno == "ML1" & nursery == "rrt" & max_temp == 32)) %>%
  # Save raw fvfm in new column
  mutate(fvfmraw = fvfm) %>%          
  # Make missing values zero (high temp / no signal)
  replace_na(replace = list(fvfm = 0)) %>%      
  # Identify problematic data points
  mutate(problem = case_when(
    max_temp >= 36 & fvfm >= 0.4 & f <= 250 ~ "abnormally high w/ low Ft",
    fvfm > 0.750 ~ "abnormally high",
    TRUE ~ "none")) %>%
  mutate(fvfm = case_when(
    problem == "abnormally high w/ low Ft" ~ 0,     # Change abnormally high values at high temps to zero
    problem == "abnormally high" ~ NA_real_,    # Change all values > 0.750 to NA
    TRUE ~ fvfm))

# Plot all data points
ggplot(dff, aes(x = max_temp, y = fvfm)) +
  geom_point(alpha = 0.2)

# Fit 3-parameter LL model to each coral individually (faster than all together) for initial outlier detection

# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
  drm(fvfm ~ max_temp, data = drop_na(data, fvfm), 
      fct = LL.3(names = c("hill", "max", "ED50")),
      upperl = c(100, 0.67, NA),
      lowerl = c(NA, 0.66, NA))}
tryll3 <- possibly(ll3, otherwise = NULL)

# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- dff %>%
  nest(data = c(f, fm, fvfm, fvfmraw, tank_id, tank_set, max_temp, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits,  : 
##   non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits,  : 
##   non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits,  : 
##   non-finite finite-difference value [3]
pars <- initmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

vals <- initmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(pars) %>%
  full_join(dff) %>%
  rename(ed50 = estimate)


# Define function to plot raw data, fitted values, and ed50 for each genotype
plotfits <- function(data) {
  ggplot(data = data, aes(x = max_temp)) +
    geom_point(aes(y = fvfmraw, color = problem), pch = 4, size = 1.25) +
    geom_point(aes(y = fvfm), pch = 1, size = 2) +
    geom_line(data = drop_na(data, .fitted),
              aes(y = .fitted)) +
    geom_vline(data = distinct(data, geno, ed50), 
               aes(xintercept = ed50), 
               lwd = 0.2, lty = 2) +
    geom_text(data = distinct(data, geno, ed50),
              aes(x = ed50, y = 0.05, label = round(ed50, 2)), 
              hjust = 1, nudge_x = -0.2, size = 3) +
    facet_wrap(~ geno) +
    scale_color_manual(values = c("black", "red", "orange", "blue"), drop = FALSE)
}

# Plot fits for each nursery
# plotfits(data = filter(vals, nursery == "rrt"))
# plotfits(data = filter(vals, nursery == "fwc"))
# plotfits(data = filter(vals, nursery == "mote"))
# plotfits(data = filter(vals, nursery == "crf"))
# plotfits(data = filter(vals, nursery == "um"))

# Remove outlier datapoints
dfff <- vals %>%
  mutate(problem = case_when(.resid > 0.15 ~ "high residual from fitted", TRUE ~ problem)) %>%
  mutate(problem = factor(problem, levels = c("none", "abnormally high", "abnormally high w/ low Ft", 
                                              "high residual from fitted"))) %>%
  mutate(fvfm = case_when(problem == "high residual from fitted" ~ NA_real_, TRUE ~ fvfm)) %>%
  select(nursery, geno, f, fm, fvfm, fvfmraw, tank_id, tank_set, max_temp, problem)
# Refit models individually without outliers
# Fit model to each coral, get parameters, fitted values, and residuals
fmods <- dfff %>%
  nest(data = c(f, fm, fvfm, fvfmraw, tank_id, tank_set, max_temp, problem)) %>%
  # Fit the model to each coral
  mutate(ll3 = map(data, tryll3)) %>%
  # Get model parameters and fitted values/residuals
  mutate(pars = map(ll3, tidy),
         pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits,  : 
##   non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits,  : 
##   non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits,  : 
##   non-finite finite-difference value [3]
fpars <- fmods %>% 
  select(nursery, geno, pars) %>%
  unnest(pars) %>%
  filter(term == "ED50")

fvals <- fmods %>%
  select(nursery, geno, pred) %>%
  unnest(pred) %>%
  full_join(fpars) %>%
  full_join(dfff) %>%
  rename(ed50 = estimate)

# plotfits(data = filter(fvals, nursery == "rrt"))
# plotfits(data = filter(fvals, nursery == "fwc"))
# plotfits(data = filter(fvals, nursery == "mote"))
# plotfits(data = filter(fvals, nursery == "crf"))
# plotfits(data = filter(fvals, nursery == "um"))

# fvals %>%
#   mutate(genoN = paste0("g", group_indices(dfff, nursery, geno))) %>%
#   ggplot(aes(x = fct_reorder(genoN, ed50), y = ed50)) +
#   geom_point() +
#   geom_errorbar(aes(ymin = ed50 - std.error, ymax = ed50 + std.error)) +
#   facet_grid(~ nursery, scales = "free_x") +
#   scale_y_continuous(limits = c(34.5, 38))
# Refit genos all together without outliers, and fixed maximum across genos
dfff$genoN <- factor(paste0("g", group_indices(dfff, nursery, geno)))
# m3f <- drm(fvfm ~ max_temp, genoN, data = dfff,
#           fct = LL.3(names = c('hill', 'max', 'ED50')),
#           upperl = c(100, NA, NA),    # upper limit of 100 on hill parameter
#           pmodels = data.frame(genoN, 1, genoN))  # all genos have fixed maximum
# 
# save(m3f, file = "data/ll3modf.RData")
load("data/ll3modf.RData")

# Tidy model output and join ED50 estimates with sample metadata
res3f <- tidy(m3f) %>%
  filter(term == "ED50") %>%
  mutate(genoN = curve) %>%
  left_join(distinct(dfff, nursery, geno, genoN)) %>%
  mutate(genoN = factor(genoN, levels = genoN[order(estimate)]))
# Get fitted/predicted values to look at fits for each genotype
pred3f <- augment(m3f, data.frame(m3f$data)) %>%
  mutate(genoN = genoN.1) %>%
  left_join(select(res3f, geno, genoN, nursery, ed50 = estimate)) %>%         # add ed50 value for each genotype
  full_join(dfff) %>%
  mutate(problem = factor(problem, levels = levels(dfff$problem))) %>%
  mutate(genoN = factor(genoN, levels = levels(res3f$genoN)))

# Plot for each nursery
plotfits(data = filter(pred3f, nursery == "rrt"))

plotfits(data = filter(pred3f, nursery == "fwc"))

plotfits(data = filter(pred3f, nursery == "mote"))

plotfits(data = filter(pred3f, nursery == "crf"))

plotfits(data = filter(pred3f, nursery == "um"))

# Plot ED50 estimates for all genotypes
res3f %>%
  ggplot(aes(x = genoN, y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate + std.error)) +
  facet_grid(~ nursery, scales = "free_x") +
  scale_x_discrete(labels = res3f$geno[order(res3f$genoN)])